rm(list = ls())

#B1BFF00240DAC86C

library(tidyverse)
library(stargazer)
library(lfe)
library(broom) #for tidy
library(lmtest) # For coeftest()
library(sandwich) # For sandwich()
library(estimatr)
library(texreg)
library(psych)
library(default)
library(fixest)

options(tibble.print_min = 10)
source("functions.R")

load(file = 'data_output/persons_years_sample.Rdata')
load(file = 'data_output/persons_years.Rdata')
load(file = 'data_output/persons_sample.Rdata')


full_sample_pos = full_sample %>% 
  filter(inc_labor != 0) %>% 
  mutate(Earnings = inc_log)



default(texreg) = list(caption.above = T,
                       stars = numeric(0),
                       digits = 3,
                       float.pos = "h!",
                       threeparttable = T,
                       dcolumn = T,
                       booktabs = T,
                       use.packages = F)
  
trait_names = c(cog = "Cognitive", noncog =  "Noncognitive", extro =  "Extraversion",
                consci = "Conscientiousness", educ_y = "Education Years",
                vuosi = "Year", synvuosi = "Cohort", shnro = "Individual",
                firm = "Firm", occ_new = "Occupation", educ = "Education",
               employed = "Work Experience", study = "Study", unemp = "Nonemployment", ptoim_other = "Other",
               "occ1==1" = "Managers", "occ1==2" = "Professionals", occ1_clerical = "Technical/Clerical", 
               "occ1==5" = "Service/Sales", occ1_blue = "Production", occ1_other = "Other", occ_Earnings = "Mean Earnings",
               social = "Sociability", energy = "Activity", delib = "Deliberation", duty = "Dutifulness", 
               achieve = "Achievement", conf = "Confidence", leader = "Leadership", 
               math = "Math", words = "Verbal", visuos = "Visual-spatial")

setFixest_etable(
  fixef_sizes = T,
  depvar = F, 
  digits = 3,
  dict = trait_names,
  style.tex = style.tex(#"aer",
    model.title = "",
    notes.tpt.intro = "\\scriptsize",
    signif.code = NA,
    tablefoot = T))

###########################################################################
# Regressions -------------------------------------------------------------
###########################################################################


# Personality predicts education --------------------------------

traits = "social + energy + delib + duty + achieve + conf + leader"

models = list(
  lm_robust(my_formula("educ_y", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("gym", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("gpa_t", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("cog", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("math_anchored", traits),
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("A_standard", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("R_standard", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons),
  lm_robust(my_formula("score", traits), 
            fixed_effects = ~ synvuosi, 
            data = persons))

# Calculate outcome mean from an lm_robust object
outcome_mean = unlist(lapply(models, function(x)pull(model.frame(x), 1) %>% mean)) %>% round(2)

texreg(models,
       file = "/tables/personality_scores.tex",
       custom.header = list("High School Test Scores" = 5:8),
       custom.model.names = c("Educ. Years", "In HS", "9th gr. GPA", "Cognitive", "Math", "Verbal", "Electives", "HS GPA"),
       custom.coef.names = c("Sociability", "Activity", 
                             "Deliberation", "Dutifulness", "Achievement", "Confidence", "Leadership"),
       custom.gof.names = c("Adj. R$^2$", "Observations"),
       custom.gof.rows = list("Outcome mean" = outcome_mean, 
                              "Cohort FE" = rep("yes", 8)),
       caption = "Personality and Academic Performance",
       label = "tab:test_scores",
       custom.note = "\\input{tables/notes_personality_scores.tex}",
       include.rsquared = F,
       include.rmse = F,
       include.ci = F,
       sideways = T)


# Earnings and personality

models = list(
  feols(Earnings ~ cog + noncog | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ cog + extro + consci + educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos)
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)
  
etable(models,
       file = "/tables/personality_earnings.tex",
       title = "Returns to Skills",
       headers = list("Dependent variable: log(Earnings)" = 5),
       label = "tab:earnings",
       depvar = T, 
       tex = T,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_personality_earnings.tex}")
       )

# Earnings and personality extensive vs intensive

models = list(
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos %>% filter(extensive == T)),
  feols(Earnings ~ cog + extro + consci + educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos %>% filter(extensive == T)),
  feols(extensive ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample),
  feols(extensive ~ cog + extro + consci + educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample)
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/personality_earnings_ext_int.tex",
       title = "Returns to Skills - Extensive vs. Intensive Margin",
       headers = list("log(Earnings)" = 2, "Employed" = 2),
       label = "tab:earnings",
       tex = T,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/personality_earnings_ext_int.tex}")
)


# Earnings with bad controls (occupation education) -----------------------

covariate_sample = full_sample_pos %>% 
  filter(vuosi>2003) %>% 
  filter(!is.na(occ_new)) %>% 
  filter(!is.na(firm)) %>% 
  filter(!is.na(industry))

models = list(
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = covariate_sample),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + educ, 
        cluster = ~ shnro, data = covariate_sample),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + occ_new, 
        cluster = ~ shnro, data = covariate_sample),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + industry, 
        cluster = ~ shnro, data = covariate_sample),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + firm, 
        cluster = ~ shnro, data = covariate_sample),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + educ + occ_new + firm + industry, 
        cluster = ~ shnro, data = covariate_sample)
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       tex = T,
       headers = list("Dependent variable: log(Earnings)" = 6),
       file = "/tables/personality_earnings_fe.tex",
       title = "Returns to Skills within Occupation and Education",
       label = "tab:earnings_fe",
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_personality_earnings_fe.tex}")
       )



# Work experience, study, unemployment ------------------------------------


models = list(
  feols(employed ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = persons),
  feols(study ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = persons),
  feols(unemp ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = persons),
  feols(ptoim_other ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = persons))


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)


etable(models,
       file = "/tables/cumulative_activity.tex",
       title = "Cumulative activity, Ages 18-38",
       #headers = list("Baseline" = 4, "With education control" = 4),
       label = "tab:activity",
       tex = T,
       depvar = T,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_cumulative_activity.tex}",
         model.format = "")
)


# Occupational sorting ----------------------------------------------------

# Data manipulation to calculate leave out occupational earnings
# Use all men here
sorting_sample = dta %>% 
  filter(field != "",
         !is.na(field),
         !is.na(occ_new)) %>% 
  add_count(job, vuosi) %>% 
  filter(n > 100)

sorting_sample = sorting_sample %>% 
  group_by(job, vuosi) %>% 
  mutate(occ_Earnings = leaveOutMean(inc_labor)) %>% ungroup

jobs = sorting_sample %>% select(shnro, vuosi, job, occ_Earnings)

# Use only yo sample here
sorting_sample = full_sample %>% left_join(jobs) # join by shnro, vuosi, job


# Estimate

traits = "cog + extro + consci | vuosi^synvuosi"

models = list(
  feols(my_formula("occ1 == 1", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ1 == 2", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ1_clerical", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ1 == 5", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ1_blue", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ1_other", traits),
            cluster = ~ shnro, 
            data = full_sample),
  feols(my_formula("occ_Earnings", traits),
            cluster = ~ shnro, 
            data = sorting_sample),
  feols(educ_y ~ cog + extro + consci | synvuosi, 
            data = persons)
)

outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/sorting_occupations.tex",
       title = "Occupational Sorting",
       headers = list("Occupations" = 6, " " = 2),
       label = "tab:occupation",
       tex = T,
       depvar = T,
       fixef_sizes = F,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_sorting_occupations.tex}",
         model.format = "")
)

# Within occupation returns
models = list(
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1 == 1)),
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1 == 2)),
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1_clerical == T)),
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1==5)),
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1_blue == T)),
  feols(my_formula("Earnings", traits),
        cluster = ~ shnro, 
        data = full_sample_pos %>% filter(occ1_other == T))
)

outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/earnings_occupations.tex",
       title = "Occupational Sorting",
       headers = list("Managers" = 1, "Professionals" = 1, "Technical/Clerical" = 1, 
                      "Service/Sales" = 1, "Production" = 1, "Other" = 1),
       label = "tab:occupation_earnings",
       tex = T,
       depvar = F,
       fixef_sizes = F,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_earnings_occupations.tex}",
         model.format = "")
)

# Robustness --------------------------------------------------------------

# Earnings and personality (levels)

models = list(
  feols(Earnings ~ cog + noncog | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample %>% mutate(Earnings = inc_labor)),
  feols(Earnings ~ extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample %>% mutate(Earnings = inc_labor)),
  feols(Earnings ~ cog + extro + consci | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample %>% mutate(Earnings = inc_labor)),
  feols(Earnings ~ educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample %>% mutate(Earnings = inc_labor)),
  feols(Earnings ~ cog + extro + consci + educ_y | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample %>% mutate(Earnings = inc_labor))
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/personality_earnings_levels.tex",
       title = "Returns to Skills",
       headers = list("Dependent variable: Earnings (2015 euros)" = 5),
       label = "tab:earnings_levels",
       tex = T,
       digits = "r0",
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_personality_earnings_levels.tex}")
)

# Earnings and personality all traits

models = list(
  feols(Earnings ~ social + energy + delib + duty + achieve + conf + leader | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ math + words + visuos | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos),
  feols(Earnings ~ social + energy + delib + duty + achieve + conf + leader +  math + words + visuos | vuosi^synvuosi, 
        cluster = ~ shnro, data = full_sample_pos)
)


outcome_mean = unlist(lapply(models, function(x)(model.matrix(x, type = "lhs")) %>% mean)) %>% round(2)

etable(models,
       file = "/tables/personality_earnings_traits.tex",
       title = "Returns to Skills",
       headers = list("Dependent variable: log(Earnings)" = 3),
       label = "tab:earnings_traits",
       tex = T,
       extralines = list("_Dep. Mean" = outcome_mean),
       tpt = T,
       style.tex = style.tex(#"aer",
         tablefoot.value = "\\input{tables/notes_personality_earnings_traits.tex}")
)

# Decomposition -----------------------------------------------------------

sample_beginning = full_sample_pos %>% 
  filter(vuosi %in% 2004:2006) %>% 
  filter(!is.na(occ_new)) %>% 
  filter(!is.na(firm)) %>% 
  filter(!is.na(industry))

sample_end = full_sample_pos %>% 
  filter(vuosi %in% 2013:2015) %>% 
  filter(!is.na(occ_new)) %>% 
  filter(!is.na(firm)) %>% 
  filter(!is.na(industry))

models_beginning = feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + sw0(educ, industry, firm, occ_new), 
                   cluster = ~ shnro, data = sample_beginning)

models_end = feols(Earnings ~ cog + extro + consci | vuosi^synvuosi + sw0(educ, industry, firm, occ_new), 
      cluster = ~ shnro, data = sample_end)

models_beginning = models_beginning %>% coef
models_end = models_end %>% coef

decomp = models_end - models_beginning
decomp = decomp %>% as_tibble
names(decomp) = c("cog_within", "extro_within", "consci_within")
decomp = decomp %>% 
  mutate(cog_across = cog_within[1]-cog_within,
         extro_across = extro_within[1]-extro_within,
         consci_across = consci_within[1]-consci_within,
         specification = c("baseline", "educ", "industry", "firm", "occ_new"))
decomp = decomp[,c(7,1,4,2,5,3,6)]
decomp

write_csv(decomp, "/tables/decomposition.csv")



